package com.o2o.cleaning.month.platform.ebusiness_plat.rongegou

import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession
import org.elasticsearch.spark._

object _01_ES_TO_OSS {


  def main(args: Array[String]): Unit = {

    val spark = SparkSession.builder()
      .appName("CheckDataDetail")
      .config("spark.debug.maxToStringFields", "2000")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .config("spark.sql.caseSensitive", "true")
      .config("es.nodes", "192.168.1.29")
      .config("es.port", "9200")
      .config("cluster.name", "O2OElastic")
      .config("es.net.http.auth.user", "elastic")
      .config("es.net.http.auth.pass", "changeme")
      .master("local[*]")
      .getOrCreate()

    val sc = spark.sparkContext
    sc.hadoopConfiguration.set("fs.s3a.access.key", "GAO7EO9FWKPJ8WFCQDME")
    sc.hadoopConfiguration.set("fs.s3a.secret.key", "LZ0xaHBSYKHaJ9ECDbX9f7zin79UZkXfGoNapRPL")
    sc.hadoopConfiguration.set("fs.s3a.endpoint", "https://obs.cn-north-1.myhuaweicloud.com")
    sc.setLogLevel("WARN")

    val year = 2021
    val month = 12

    /**
      * 每个月需要 run 一下，改参数直接run，将es的数据拉下来，同步到holo
      *
      * 将原始数据拷贝一分到mongodb目录下
      * 原始数据路径"s3a://o2o-sourcedata-2021/obs-source-2021/2021/${month}/${platform}/${collection}"
      */
    fun(spark, sc, s"${year}", s"${month}")
  }

  def fun(spark: SparkSession, sc: SparkContext, year: String, month: String) {
    val index = s"${year}_rongyigou/rongyigou_${year}_${month}"
    println("----------" + index + "----------")
    val data = sc.esJsonRDD(index).values
    spark.read.json(data).repartition(1).write.orc(s"s3a://o2o-dataproces-group/zyf/icbc/essource/${year}/${month}/")
  }

  def jsonToorc(spark: SparkSession, sc: SparkContext, year: Int, month: Int): Unit = {
    println(s"${year}---${month}")
    val frame = spark.read.json(s"s3a://o2o-dataproces-group/zyf/rongyigou/mongosource/${year}/${month}/")
      .repartition(1).write.orc(s"s3a://o2o-dataproces-group/zyf/rongyigou/mongoORCsource/${year}/${month}/")
  }
}
